In this workbook, we show how the data manipulation steps for the LCAS data to do spatial exante analytics. The data manipulation steps include: (a) variable construction, (b) combine the LCAS with geovariables, e.g., soil grids, and (c) combine the LCAS to climate variables. We then show an interactive table that shows the merged data. We then use the data as inputs in subsequent spatial exante workflows.
We first clear the working, load all the packages and import the data from dataverse. The data is on CIMMYT CSISA dataverse: https://data.cimmyt.org/dataset.xhtml?persistentId=hdl:11529/10548507. To download the data, we use “agro’’ R package.
# ConversionsLDS$C.q306_cropLarestAreaHA=LDS$C.q306_cropLarestAreaAcre*0.405#acre to haLDS$yield_kgperha=LDS$L.tonPerHectare*1000#t/ha to kg per haLDS$L.q607_farmGatePricePerKg=LDS$L.q607_farmGatePrice/100# convert to price per kg# Calculate N, P appliedLDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="10_26_26"]=0.10LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="12_32_16"]=0.12LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="14_35_14"]=0.14LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="Other20-13-13"]=0.20LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="Other20-20-0-13"]=0.20LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="Other20-20-13"]=0.20LDS$F.q51071_gradeNPKN=as.numeric(LDS$F.q51071_gradeNPKN)LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="10_26_26"]=0.26LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="12_32_16"]=0.32LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="14_35_14"]=0.35LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="Other20-13-13"]=0.13LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="Other20-20-0-13"]=0.20LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="Other20-20-13"]=0.20LDS$F.q51071_gradeNPKP=as.numeric(LDS$F.q51071_gradeNPKP)LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="10_26_26"]=0.26LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="12_32_16"]=0.16LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="14_35_14"]=0.14LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="Other20-13-13"]=0.13LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="Other20-20-13"]=0.13LDS$F.q51071_gradeNPKK=as.numeric(LDS$F.q51071_gradeNPKK)# NPKS -----------LDS$F.q51211_gradeNPKSN[LDS$F.q51211_gradeNPKS=="16_20_0_13"]=0.16LDS$F.q51211_gradeNPKSN[LDS$F.q51211_gradeNPKS=="20_20_0_13"]=0.20LDS$F.q51211_gradeNPKSN=as.numeric(LDS$F.q51211_gradeNPKSN)LDS$F.q51211_gradeNPKSP[LDS$F.q51211_gradeNPKS=="16_20_0_13"]=0.16LDS$F.q51211_gradeNPKSP[LDS$F.q51211_gradeNPKS=="20_20_0_13"]=0.20LDS$F.q51211_gradeNPKSP=as.numeric(LDS$F.q51211_gradeNPKSP)LDS$F.q51211_gradeNPKSK[LDS$F.q51211_gradeNPKS=="16_20_0_13"]=0.16LDS$F.q51211_gradeNPKSK[LDS$F.q51211_gradeNPKS=="20_20_0_13"]=0.20LDS$F.q51211_gradeNPKSK=as.numeric(LDS$F.q51211_gradeNPKSK)LDS$F.q51211_gradeNPKSS[LDS$F.q51211_gradeNPKS=="16_20_0_13"]=0.13LDS$F.q51211_gradeNPKSS[LDS$F.q51211_gradeNPKS=="20_20_0_13"]=0.13LDS$F.q51211_gradeNPKSS=as.numeric(LDS$F.q51211_gradeNPKSS)# Nutrient Content ----------------------# Taken from Cedrez, Chamberlain, Guo and Hijmans, p3### N -----------------------------------LDS$F.totAmtDAPN=LDS$F.totAmtDAP*0.18LDS$F.totAmtUreaN=LDS$F.totAmtUrea*0.46LDS$F.totAmtNPKN=LDS$F.totAmtNPK*LDS$F.q51071_gradeNPKNLDS$F.totAmtTSPN=LDS$F.totAmtTSP*0LDS$F.totAmtSSPN=LDS$F.totAmtSSP*0LDS$F.totAmtNPKSN=LDS$F.totAmtNPKS*LDS$F.q51211_gradeNPKSNLDS$N=rowSums(LDS[,c("F.totAmtDAPN","F.totAmtUreaN","F.totAmtNPKN","F.totAmtTSPN","F.totAmtSSPN","F.totAmtNPKSN")],na.rm =TRUE)LDS$Nperha=LDS$N/LDS$C.q306_cropLarestAreaHALDS$NperhaSq=LDS$Nperha*LDS$Nperha### P ------------------------------------LDS$F.totAmtDAPP=LDS$F.totAmtDAP*0.46LDS$F.totAmtUreaP=LDS$F.totAmtUrea*0LDS$F.totAmtNPKP=LDS$F.totAmtNPK*LDS$F.q51071_gradeNPKPLDS$F.totAmtTSPP=LDS$F.totAmtTSP*0.45LDS$F.totAmtSSPP=LDS$F.totAmtSSP*0.2LDS$F.totAmtNPKSP=LDS$F.totAmtNPKS*LDS$F.q51211_gradeNPKSPLDS$P2O5=rowSums(LDS[,c("F.totAmtDAPP","F.totAmtUreaP","F.totAmtNPKP","F.totAmtTSPP","F.totAmtSSPP","F.totAmtNPKSP")],na.rm =TRUE)LDS$P2O5perha=LDS$P2O5/LDS$C.q306_cropLarestAreaHA# Creating dummy variables ------------------------LDS$A.q111_fGenderdum[LDS$A.q111_fGender=="female"]=1LDS$A.q111_fGenderdum[LDS$A.q111_fGender=="male"]=0varieties=read.csv("LDS wheat variety maturity class.csv")LDS=merge(LDS,varieties, by="D.q410_varName",all.x=TRUE)LDS$variety_type_NMWV[LDS$variety_type=="NMWV"]=1LDS$variety_type_NMWV[LDS$variety_type=="EMWV"]=0LDS$variety_type_NMWV=as.numeric(LDS$variety_type_NMWV)# Sowing time new --------------------------------------------------------------LDS$Sowdate=LDS$D.seedingSowingTransplantinglibrary(tidyr)LDS=LDS %>%separate(Sowdate, c("Sday","Smonth", "Syear"))table(LDS$Sday)
The survey data contains approximate GPS locations of the plots. We can use these to extract soil and climate variables that are then included in crop response function.
# Function to add Geo-variables library(sf)
Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(sp)library(rgdal)
Please note that rgdal will be retired by the end of 2023,
plan transition to sf/stars/terra functions using GDAL and PROJ
at your earliest convenience.
rgdal: version: 1.5-29, (SVN revision 1165M)
Geospatial Data Abstraction Library extensions to R successfully loaded
Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
Path to GDAL shared files: C:/Users/MMKONDIWA/OneDrive - CIMMYT/Documents/R/win-library/4.1/rgdal/gdal
GDAL binary built with GEOS: TRUE
Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
Path to PROJ shared files: C:/Users/MMKONDIWA/OneDrive - CIMMYT/Documents/R/win-library/4.1/rgdal/proj
PROJ CDN enabled: FALSE
Linking to sp version:1.4-6
To mute warnings of possible GDAL/OSR exportToProj4() degradation,
use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
Overwritten PROJ_LIB was C:/Users/MMKONDIWA/OneDrive - CIMMYT/Documents/R/win-library/4.1/rgdal/proj
library(terra)
terra 1.5.21
Attaching package: 'terra'
The following object is masked from 'package:rgdal':
project
The following object is masked from 'package:ggplot2':
arrow
The following object is masked from 'package:tidyr':
extract
The following object is masked from 'package:dplyr':
src
library(raster)
Attaching package: 'raster'
The following object is masked from 'package:dplyr':
select
library(geodata)# add_secondary_lcas <- function (df) {# # Remove duplicates and NAs in geo-coordinates# #df=subset(df,!(duplicated(df$O.largestPlotGPS.Longitude)))# #df=subset(df,!(duplicated(df$O.largestPlotGPS.Latitude)))# df=subset(df,!(is.na(df$O.largestPlotGPS.Longitude)))# df=subset(df,!(is.na(df$O.largestPlotGPS.Latitude)))# df_sp= SpatialPointsDataFrame(cbind(df$O.largestPlotGPS.Longitude,df$O.largestPlotGPS.Latitude),data=df,proj4string=CRS("+proj=longlat +datum=WGS84"))# df_sf=st_as_sf(df_sp)# # population=population(2020,05,path=tempdir())# population_geodata=terra::extract(population,vect(df_sf),fun=mean,df=TRUE)# elevationglobal_geodata=elevation_global(0.5,path=tempdir())# elevation_geodata=terra::extract(elevationglobal_geodata,vect(df_sf),fun=mean,df=TRUE)# Soilsand=soil_world("sand",depth=5,path=tempdir())# Soilsand_lds=terra::extract(Soilsand,vect(df_sf),fun=mean,df=TRUE)# Totalnitrogen=soil_world("nitrogen",depth=5,path=tempdir())# Totalnitrogen_lds=terra::extract(Totalnitrogen,vect(df_sf),fun=mean,df=TRUE)# soilsoc=soil_world("soc",depth=15,path=tempdir())# soilsoc_lds=terra::extract(soilsoc,vect(df_sf),fun=mean,df=TRUE)# # # Merge all soils and population# geodata_df <- list(population_geodata,elevation_geodata,Soilsand_lds,Totalnitrogen_lds,soilsoc_lds)# geodata_df=Reduce(function(x, y) merge(x, y, all=TRUE),geodata_df)# #geodata_df=return(data.frame(geodata_df))# write.csv(geodata_df,paste0("geovariables",".csv"))# }# add_secondary_lcas(LDS)library(rio)geovariables=import("geovariables.csv")LDS=cbind(LDS,geovariables)
4 Climate variables
The geodata R package has aggregated rainfall and temperature variables. However, we need climate variables specific to the corresponding growing season.
The following object is masked from 'package:raster':
shift
The following object is masked from 'package:terra':
shift
The following objects are masked from 'package:lubridate':
hour, isoweek, mday, minute, month, quarter, second, wday, week,
yday, year
The following objects are masked from 'package:dplyr':
between, first, last
library(exactextractr)#RUN ONCE# add_temp_precip_lcas <- function (df) {# # Remove duplicates and NAs in geo-coordinates# #df=subset(df,!(duplicated(df$O.largestPlotGPS.Longitude)))# #df=subset(df,!(duplicated(df$O.largestPlotGPS.Latitude)))# df=subset(df,!(is.na(df$O.largestPlotGPS.Longitude)))# df=subset(df,!(is.na(df$O.largestPlotGPS.Latitude)))# df_sp= SpatialPointsDataFrame(cbind(df$O.largestPlotGPS.Longitude,df$OlargestPlotGPS.Latitude),data=df,proj4string=CRS("+proj=longlat +datum=WGS84"))# # df_sf=st_as_sf(df_sp)# version = "501"# start.yr = 1960# num.yrs = ifelse(version=="501", (2017-start.yr+1), (2010-start.yr+1))# udel.temp.filename = paste0("air.mon.mean.v",version,".nc")# udel.precip.filename = paste0("precip.mon.total.v",version,".nc")# # Output location to write results to# out.filename = paste0("UDel.aggregated.public.v",version,".csv")# out.filename2017 = paste0("UDel.aggregated2017.public.v",version,".csv")# yr.offset = start.yr-1900# temps = subset(brick(udel.temp.filename), (yr.offset*12+1):(12*(yr.offset+num.yrs)))# precip = subset(brick(udel.precip.filename), (yr.offset*12+1):(12*(yr.offset+num.yrs)))# # 1. Aggregate across months within a year: mean for temp, sum for precip# annual.temps = stackApply(temps, indices = rep(1:num.yrs, each=12), fun=mean)# annual.precip = stackApply(precip, indices = rep(1:num.yrs, each=12), fun=sum)# # 2. Aggregate spatially.# annual.temps = rotate(annual.temps)# annual.precip = rotate(annual.precip)# # df_sf$idmatching=1:nrow(df_sf)# # # Aggregate temperatures# ctry.temps = rbindlist(lapply(1:num.yrs, FUN=function(yr) {# ctry.temps = extract(annual.temps[[yr]], df_sf)# # Create data.table of results for this year, including the year# return(data.table(hhid=df_sf$idmatching, temp=ctry.temps, yr=yr-1+start.yr))# }))# # #Aggregate precipitation# # Note here we're going to multiply precip data by 10.# # The UDel data is in cm/year, but Burke et al use mm/year.# ctry.precip = rbindlist(lapply(1:num.yrs, FUN=function(yr) {# cropped.precip = annual.precip[[yr]]*10# ctry.precip = extract(cropped.precip, df_sf)# # Create data.table of results for this year, including the year# return(data.table(hhid=df_sf$idmatching, precip=ctry.precip, yr=yr-1+start.yr))# }))# # # Combine these results and save# all.udel.data = merge(ctry.temps, ctry.precip, by=c("hhid", "yr"))# all.udel.data_2017=subset(all.udel.data,all.udel.data$yr=="2017")# fwrite(all.udel.data, out.filename)# fwrite(all.udel.data_2017, out.filename2017)# }# # add_temp_precip_lcas(LDS)## Temperature and Rainfall -------------------tempprecip=read.csv("UDel.aggregated2017.public.v501.csv")tempprecipall=read.csv("UDel.aggregated.public.v501.csv")tempprecipallwide=reshape(tempprecipall, direction ="wide", idvar ="hhid", timevar ="yr")tempprecipallwide_small=subset(tempprecipallwide, select=c("precip.2007","temp.2008","precip.2008","temp.2009","precip.2009","temp.2010","precip.2010","temp.2011","precip.2011","temp.2012","precip.2012","temp.2013","precip.2013","temp.2014","precip.2014","temp.2015","precip.2015","temp.2016","precip.2016","temp.2017","precip.2017"))LDS=cbind(LDS,tempprecip,tempprecipallwide_small)# Interactive table of the datalibrary(reactable)reactable(LDS)